## importing the data filesimport pandas as pdadvertiser = pd.read_csv(r'C:\Users\HP\Downloads\elections-data\advertisers.csv', encoding='utf-8-sig')result = pd.read_csv(r'C:\Users\HP\Downloads\elections-data\results.csv', encoding='utf-8-sig')location = pd.read_csv(r'C:\Users\HP\Downloads\elections-data\locations.csv', encoding='utf-8-sig')advertiser = advertiser.rename(columns ={'Page ID':'Page_ID','Page name':'Page_name','Amount spent (INR)':'Amount_spent_INR','Number of ads in Library':'Ads_count'})advertiser.head()result =result.rename(columns={'_id':'id','Sl No':'Serial_No','PC_Name':'Parliamentary_Constituency','Total Electors':'Total_Electors','Polled (%)':'Polled_Percentage','Total Votes':'Total_votes'})result.head()location=location.rename(columns ={'Location name':'Location_name','Amount spent (INR)':'Amount_spent_INR'})location.head()## combining the result and location dataframeresult['State']=result['State'].str.strip().str.lower()## here we have removed the unnessary white spaces using the strip() and the convertedall the values in lowwer case using the lower()location['Location_name']=location['Location_name'].str.strip().str.lower()##we have done same thing to location column as we have done for resultmerge_data = result.merge(location, left_on ='State',right_on = 'Location_name',how = 'left')## we have merge data using the state and location_name as the key in the left join## If there is no match, NaN values will be used for columns from the locations DataFrame.merge_data.head()xxxxxxxxxx### have a look a the total ad spend on the stateimport plotly.express as pximport plotly.io as pioimport plotly.graph_objects as gopio.templates.default = "plotly_white"state_ad_spend = merge_data.groupby('State')['Amount_spent_INR'].sum().reset_index()## we have grouped the data in the order of state and sum the total amount spend in each statestate_ad_spend.head()fig = px.bar(state_ad_spend,x = 'State',y = 'Amount_spent_INR',labels = { 'State': 'State','Amount_spent_INR': 'Ad_Spend_INR'},title = 'Total Ad Spend by State')fig.update_layout(xaxis = {'categoryorder' : 'total descending'}, xaxis_tickangle = -90,width = 800,height = 600)fig.show()xxxxxxxxxx###### The bar graph highlights the total ad spend by state in INR. Uttar Pradesh stands out with the highest ad spend, followed by Maharashtra and Odisha. States like West Bengal, Tamil Nadu, Andhra Pradesh, and Bihar also have significant ad expenditures. On the other hand, places like Lakshadweep, Dadra & Nagar Haveli, Daman & Diu, Andaman & Nicobar Islands, and Arunachal Pradesh have the lowest ad spend. This pattern shows that bigger and more populous states tend to spend more on ads, reflecting their political importance and larger voter base.state_voter_turnout = merge_data.groupby('State')['Polled_Percentage'].mean().reset_index()fig = px.bar(state_voter_turnout, x= 'State', y = 'Polled_Percentage',labels = {'State':'State','Polled_Percentage':'Voter Turnout(%)'}, title = 'Average Voter Turnout by State')fig.update_layout(xaxis = {'categoryorder':'total descending'}, xaxis_tickangle = -90, width = 800, height = 600)xxxxxxxxxx###### Lakshadweep tops the list with nearly 80% average voter turnout, closely followed by Tripura and Assam. Andhra Pradesh, Sikkim, and West Bengal also show high engagement, with turnouts over 70%. On the flip side, Bihar, Uttar Pradesh, and Uttarakhand have the lowest average turnout, around 50-55%. This highlights significant regional differences in voter participation, where smaller states and union territories tend to have higher engagement compared to larger states with more ad spend.advertiser['Amount_spent_INR'] = pd.to_numeric(advertiser['Amount_spent_INR'],errors = 'coerce')## converting the amount to the numeric values if not there then replace with NaNadvertiser.dropna(subset = ['Amount_spent_INR'],inplace = True)## droping the row with the NaN valuesparty_ad_spend = advertiser.groupby('Page_name')['Amount_spent_INR'].sum().sort_values(ascending = False)## Grouping by the page name and summing the INRtop_5_parties = party_ad_spend.head(5).reset_index()## top 5 parties spendingcolors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']fig = px.pie(top_5_parties, values = 'Amount_spent_INR', names = 'Page_name', title = ' Top 5 Parties by Ad Spend', color_discrete_sequence = colors, labels = {'Page_name':'Political Party','Amount_spent_INR':'Ad Spend'})fig.update_layout(showlegend = True,legend = dict(orientation = "v",yanchor = "top",y = 1, xanchor = "left", x = -1 ), title = dict( y = 0.95, x = 0.5, xanchor = 'center', yanchor = 'top' ), margin = dict(l=200,r = 50,t = 100, b=50) )fig.show()xxxxxxxxxx##### The Bharatiya Janata Party (BJP) leads in ad spend, making up 42.3% of the total. Following them, Ama Chinha Sankha Chinha accounts for 24.5%, and the Indian National Congress has 23.7%. Meanwhile, Ellorum Nammudan and BJP Odisha have much lower spends, at 5.19% and 4.27%, respectively. This shows that BJP dominates Facebook and Instagram ad spending, with nearly half of the total expenditure, highlighting their significant investment in advertising compared to other parties.xxxxxxxxxx### The correlation between ad spend and voter turnout:# calculate the correlation between ad spend and voter turnoutcorrelation = merge_data[['Amount_spent_INR','Polled_Percentage']].corr()## we have selected two columns form the merge data and then calculated the co relation between then using the corr()print(correlation)xxxxxxxxxx##### The correlation matrix shows that the link between the amount spent (INR) and voter turnout (%) is extremely weak and slightly negative, with a correlation coefficient of -0.010688. This means there’s virtually no linear relationship between ad spend and voter turnout. So, spending more on ads doesn't really impact voter turnout significantly.xxxxxxxxxx### Relationship between ad spend and voter turnout by parliamentary constituency# Ensure there are no NaN values in the key columnsresult.dropna(subset=['State'], inplace=True)location.dropna(subset=['Location_name'], inplace=True)merged_constituency_data = result.merge(location,left_on = 'State',right_on ='Location_name',how ='left')## we have merge two data set result and locaton and use state and location name as the keyfig = px.scatter(merged_constituency_data ,x='Amount_spent_INR',y ='Polled_Percentage', color = 'State', labels = {'Amount_spent_INR':'Ad Spend (INR)','Polled_Percentage': 'Voter Turnout(%)'}, title = 'Ad spend and voter turnout by Parliamentary Constituency')fig.update_layout(width = 800, height = 600)fig.show()xxxxxxxxxx#### The scatter plot indicates that higher ad spending doesn't necessarily lead to higher voter turnout. Most constituencies show voter turnout clustering between 60% and 80%, regardless of the ad spend, which varies from 0 to 150 million INR. This suggests that factors other than ad spend are likely playing a significant role in influencing voter turnout.fig = px.histogram(merge_data, x='Amount_spent_INR', nbins = 30, marginal = 'box',labels = {'Amount_spent_INR':'As_Spend_INR'},title = 'Distribution of Ad Spend')fig.update_traces(marker = dict(line=dict(color ='black', width = 1)))fig.update_layout(bargap =0.1,width = 800, height = 600)fig.show()xxxxxxxxxx#### The histogram shows that most constituencies have ad spends clustered around the 50M and 100M INR marks, with fewer constituencies spending less than 10M INR or more than 150M INR. The box plot reveals a median ad spend of about 70M INR, with an interquartile range (IQR) from roughly 30M to 110M INR. There are a few outliers, particularly a constituency with a notably high ad spend above 150M INR. This distribution indicates that while most ad spends fall within a certain range, there are some constituencies with significantly higher expenditures.xxxxxxxxxx### Ad spending and voter turnout by election phaseimport plotly.graph_objects as gophase_analysis = merge_data.groupby('Phase').agg({ 'Amount_spent_INR':'sum', 'Polled_Percentage':'mean'}).reset_index()## Aggregates the data by calculating the sum of 'Amount spent (INR)' and the mean of 'Polled (%)' for each election phase.fig = go.Figure()#This line initializes a new Plotly figure.fig.add_trace(go.Bar(x = phase_analysis['Phase'],y = phase_analysis['Amount_spent_INR'],name = 'Ad_spend_INR',marker_color = 'indianred',yaxis ='y1'))fig.add_trace(go.Scatter(x=phase_analysis['Phase'],y=phase_analysis['Polled_Percentage'],name = 'Voter_turnout_percentage',marker_color = 'lightsalmon',yaxis = 'y2'))fig.update_layout(title = ' Ad spend and voter turnout by Election Phase',xaxis = dict(title = 'Election Phase'),yaxis = dict (title = 'Ad Spend (INR)',titlefont = dict(color = 'indianred'),tickfont = dict(color = 'indianred')),yaxis2 = dict(title='voter turnout (%)',titlefont = dict(color = 'lightsalmon'),tickfont = dict(color = 'lightsalmon'),overlaying = 'y',side = 'right'),legend = dict(x=0.1,y =1.1,orientation ='h'),width = 800,height = 600)xxxxxxxxxx##### There's no clear pattern between ad spend and voter turnout. Phases 1 and 4 have the highest ad spends, with phase 4 seeing voter turnout peak around 70%. Despite the high ad spend, phase 1 has a lower turnout at about 67%. Phases with moderate ad spend, like 2 and 6, show lower voter turnout, while phase 5 stands out with notably low turnout despite moderate spending.xxxxxxxxxx### ### Conclusion- Higher ad spend does not guarantee higher voter turnout.- Voter engagement is influenced by various other factors.- Larger, more significant states tend to spend more on ads, but this doesn’t always lead to higher voter participation.- Political parties, especially the BJP, invest heavily in advertising.- The effectiveness of this ad spending in increasing voter turnout remains questionable.xxxxxxxxxx